Steps: + randomly assign each team a quality score, drawn from uniform distribution + have teams in each conference play round robin where chance of victory is proportional to quality ratio + select top 8 teams in each conference, divide 4 levels of seeds + perform two draws: + one cross conference + one where conferences are drawn into groups separately + measure average quality of opponents in each scenario, record seed and average opponent quality for both draw types + repeat above 1000 times (see part 2)
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
import string
import statistics as stats
# create 18 teams in each conference
east_teams = ['east_'+x for x in string.ascii_uppercase[0:18]]
west_teams = ['west_'+x for x in string.ascii_uppercase[0:18]]
# function to assign team qualities
def assign_qualities(teams):
# randomly assign quality scores to each team
qualities = np.random.normal(loc=100,scale=30,size=len(teams))
return(dict(zip(teams,qualities)))
east_qualities = assign_qualities(east_teams)
west_qualities = assign_qualities(west_teams)
# create round_robin function
def round_robin(teams,team_qualities):
result_list=list()
# create a dictionary matching the random draw number to points for each team
points_dict={0:(3,0),1:(1,1),2:(0,3)}
for team1 in teams:
for team2 in teams:
if team1!=team2:
team1_qual,team2_qual=max(team_qualities[team1],10),max(team_qualities[team2],10)
weights = [team1_qual,stats.mean([team1_qual,team2_qual])*np.random.uniform(0,1),team2_qual]
probs = [w/sum(weights) for w in weights]
# draw a number, 0= team1 wins, 1=draw, 2=team2 win
result=points_dict[np.random.choice([0,1,2],1,False,probs)[0]]
game_frame=pd.DataFrame({'home':[team1],'away':[team2],'home_points':result[0],'away_points':result[1]})
result_list.append(game_frame)
return(pd.concat(result_list))
east_results = round_robin(east_teams,east_qualities)
west_results = round_robin(west_teams,west_qualities)
def standings(teams,results,qualities,label):
# frame of pots
pots_frame=pd.DataFrame({'league_place':range(1,9),'pot':['pot1','pot1','pot2','pot2','pot3','pot3','pot4','pot4']})
home_points = results.groupby(['home'])['home_points'].sum().reset_index()
away_points = results.groupby(['away'])['away_points'].sum().reset_index()
point_totals = pd.merge(home_points,away_points,left_on='home',right_on='away')
point_totals['total_points']=point_totals['home_points']+point_totals['away_points']
quality_frame = pd.DataFrame.from_dict(qualities,orient='index',columns=['quality']).rename_axis('team').reset_index()
final = quality_frame.merge(point_totals,left_on='team',right_on='home').sort_values(by='total_points',ascending=False)
final['league_place'] = final.reset_index().index+1
final['label']=label
return(final.merge(pots_frame,how='left',on='league_place')[['league_place','team','quality','total_points','pot','label']])
east_standings = standings(east_teams,east_results,east_qualities,'east')
west_standings = standings(west_teams,west_results,west_qualities,'west')
def draw_groups(standings,ngroups,label):
if type(standings) is list:
combined=pd.concat(standings)
else:
combined=standings
seeded = combined[combined['pot'].notna()]
# create column for random sort
seeded['drawn'] = np.random.uniform(size=seeded.shape[0])
seeded=seeded.sort_values(['pot','drawn']).reset_index()
seeded['group'] = ['group'+str(x%ngroups) for x in seeded.index+1]
# calculate quality of each group
group_quality = seeded.groupby(['group'])['quality'].sum().reset_index().rename(columns={'group':'group','quality':'group_quality'})
# find the highest quality opponent in each group
seeded = seeded.merge(group_quality,on='group')
seeded['opponent_quality']=seeded['group_quality']-seeded['quality']
seeded['label'] = label
return(seeded)
#west_standings = standings(west_teams,west_results,west_qualities)
east_west_draw = draw_groups([east_standings,west_standings],4,'combined')
east_draw = draw_groups(east_standings,2,'separate-east')
west_draw = draw_groups(west_standings,2,'separate-west')
bind_rows(py$east_west_draw,py$east_draw,py$west_draw)%>%
mutate(draw_type = case_when(grepl('separate',label)~'separate',
TRUE~label))%>%
ggplot(aes(x=league_place,y=opponent_quality,color=draw_type))+
geom_point(position = position_jitter(w = 0.2, h = 0))+
geom_smooth(method='lm')+
theme(legend.position="bottom")+
labs(y='sum of group stage opponents quality scores',
x='conference finish place',
title=paste0('single_simulation_run'))
## `geom_smooth()` using formula 'y ~ x'
bind_rows(py$east_standings,py$west_standings)%>%
ggplot(aes(x=quality,y=league_place,color=label))+
geom_point()+
labs(x="team quality",y="table place",color="conference",
title="Team Quality vs League Table Place")+
theme(legend.position='bottom')+
scale_color_discrete(palette('dark2'))
# find top quality opponent
group_rank<-bind_rows(py$east_west_draw,py$east_draw,py$west_draw)%>%
arrange(group,desc(quality))%>%
group_by(label,group)%>%
mutate(group_qual_rank=row_number())%>%
ungroup()%>%
select(label,'top_opponent'=team,group,'top_quality'=quality,group_qual_rank)
with_top<-bind_rows(py$east_west_draw,py$east_draw,py$west_draw)%>%
# join group rankings
left_join(group_rank,by=c('label', 'group'))%>%
# delete rows where team is top opponent
filter(team!=top_opponent)%>%
# only keep lowest group rank
group_by(team,label,group)%>%
slice(which.min(group_qual_rank))%>%
ungroup()
with_top%>%
mutate(draw_type = case_when(grepl('separate',label)~'separate',
TRUE~label))%>%
filter(league_place<=4)%>%
ggplot(aes(x=pot,y=top_quality,color=draw_type))+
geom_point(position = position_jitter(w = 0.2, h = 0))+
geom_smooth(method='lm')+
theme(legend.position="bottom")+
labs(y='top opponent quality score',
x='conference finish place',
title=paste0('single_simulation_run'))+
scale_color_viridis_d(option = 'inferno')
## `geom_smooth()` using formula 'y ~ x'
nsim=1000
standings_list = list()
groups_list = list()
# create 18 teams in each conference
east_teams = ['east_'+x for x in string.ascii_uppercase[0:18]]
west_teams = ['west_'+x for x in string.ascii_uppercase[0:18]]
for sim in range(nsim):
#print(sim)
# assign qualities to teams
east_qualities = assign_qualities(east_teams)
west_qualities = assign_qualities(west_teams)
# simulate round-robin league play
east_results = round_robin(east_teams,east_qualities)
west_results = round_robin(west_teams,west_qualities)
# find standings
east_standings = standings(east_teams,east_results,east_qualities,'east')
west_standings = standings(west_teams,west_results,west_qualities,'west')
# simulate draws
#west_standings = standings(west_teams,west_results,west_qualities)
east_west_draw = draw_groups([east_standings,west_standings],4,'combined')
east_draw = draw_groups(east_standings,2,'separate-east')
west_draw = draw_groups(west_standings,2,'separate-west')
standings_temp = pd.concat([east_standings,west_standings])
standings_temp['sim']=str(sim) #note the simulation number
standings_list.append(standings_temp)
groups_temp = pd.concat([east_west_draw,east_draw,west_draw])
groups_temp['sim'] = str(sim)
groups_list.append(groups_temp)
sim_standings= pd.concat(standings_list)
sim_groups = pd.concat(groups_list)
py$sim_groups%>%
mutate(draw_type = case_when(grepl('separate',label)~'separate',
TRUE~label))%>%
ggplot(aes(x=league_place,y=opponent_quality,color=draw_type))+
geom_point(position = position_jitter(w = 0.1, h = 0),alpha=.1)+
geom_smooth(method='lm')+
theme(legend.position="bottom")+
labs(y='sum of group stage opponents quality scores',
x='conference finish place',
title=paste0(py$nsim,' simulations results - total group opponent quality\nlinear quality matchup weighting'))
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
## `geom_smooth()` using formula 'y ~ x'
py$sim_standings%>%
ggplot(aes(x=quality,y=league_place,color=label))+
geom_point(position = position_jitter(w = 0, h = .1),alpha=.1)+
labs(x="team quality",y="table place",color="conference",
title=paste0("Team Quality vs League Table Place: ",py$nsim,' simulations\nlinear quality matchup weighting'))+
theme(legend.position='bottom')+
scale_color_brewer(palette='Dark2')
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
group_rank<-py$sim_groups%>%
arrange(sim,label,group,desc(quality))%>%
group_by(sim,label,group)%>%
mutate(group_qual_rank=row_number())%>%
ungroup()%>%
select(label,sim,'top_opponent'=team,group,'top_quality'=quality,group_qual_rank)
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top<-py$sim_groups%>%
# join group rankings
left_join(group_rank,by=c('sim', 'label', 'group'))%>%
# delete rows where team is top opponent
filter(team!=top_opponent)%>%
# only keep lowest group rank for each team
group_by(team,sim,label,group)%>%
slice(which.min(group_qual_rank))%>%
ungroup()
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top%>%
mutate(draw_type = case_when(grepl('separate',label)~'separate',
TRUE~label))%>%
filter(league_place<=4)%>%
ggplot(aes(x=league_place,y=top_quality,color=draw_type))+ geom_point(position = position_jitter(w = 0.2, h = 0),alpha=.2)+
geom_smooth(method='lm')+
theme(legend.position="bottom")+
labs(y='top opponent quality score',
x='conference finish place',
title=paste0(py$nsim,' simulations results - top group opponent quality for pots 1 and 2\nlinear quality matchup weighting'))+
scale_color_viridis_d(option = 'inferno')
## `geom_smooth()` using formula 'y ~ x'
py$sim_groups%>%
group_by(league_place,label)%>%
summarize('average_opponent_quality'=mean(opponent_quality))
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
## `summarise()` has grouped output by 'league_place'. You can override using the
## `.groups` argument.
## # A tibble: 24 × 3
## # Groups: league_place [8]
## league_place label average_opponent_quality
## <dbl> <chr> <dbl>
## 1 1 combined 337.
## 2 1 separate-east 337.
## 3 1 separate-west 336.
## 4 2 combined 336.
## 5 2 separate-east 338.
## 6 2 separate-west 336.
## 7 3 combined 345.
## 8 3 separate-east 346.
## 9 3 separate-west 346.
## 10 4 combined 346.
## # … with 14 more rows
def round_robin_cu(teams,team_qualities):
result_list=list()
# create a dictionary matching the random draw number to points for each team
points_dict={0:(3,0),1:(1,1),2:(0,3)}
for team1 in teams:
for team2 in teams:
if team1!=team2:
team1_qual,team2_qual=max(team_qualities[team1]**3,10),max(team_qualities[team2]**3,10)
weights = [team1_qual,stats.mean([team1_qual,team2_qual])*np.random.uniform(0,1),team2_qual]
probs = [w/sum(weights) for w in weights]
# draw a number, 0= team1 wins, 1=draw, 2=team2 win
result=points_dict[np.random.choice([0,1,2],1,False,probs)[0]]
game_frame=pd.DataFrame({'home':[team1],'away':[team2],'home_points':result[0],'away_points':result[1]})
result_list.append(game_frame)
return(pd.concat(result_list))
nsim=1000
standings_list_cu = list()
groups_list_cu = list()
# create 18 teams in each conference
east_teams = ['east_'+x for x in string.ascii_uppercase[0:18]]
west_teams = ['west_'+x for x in string.ascii_uppercase[0:18]]
for sim in range(nsim):
#print(sim)
# assign qualities to teams
east_qualities = assign_qualities(east_teams)
west_qualities = assign_qualities(west_teams)
# simulate round-robin league play
east_results = round_robin_cu(east_teams,east_qualities)
west_results = round_robin_cu(west_teams,west_qualities)
# find standings
east_standings = standings(east_teams,east_results,east_qualities,'east')
west_standings = standings(west_teams,west_results,west_qualities,'west')
# simulate draws
#west_standings = standings(west_teams,west_results,west_qualities)
east_west_draw = draw_groups([east_standings,west_standings],4,'combined')
east_draw = draw_groups(east_standings,2,'separate-east')
west_draw = draw_groups(west_standings,2,'separate-west')
standings_temp = pd.concat([east_standings,west_standings])
standings_temp['sim']=str(sim) #note the simulation number
standings_list_cu.append(standings_temp)
groups_temp = pd.concat([east_west_draw,east_draw,west_draw])
groups_temp['sim'] = str(sim)
groups_list_cu.append(groups_temp)
sim_standings_cu= pd.concat(standings_list_cu)
sim_groups_cu = pd.concat(groups_list_cu)
py$sim_groups_cu%>%
mutate(draw_type = case_when(grepl('separate',label)~'separate',
TRUE~label))%>%
ggplot(aes(x=league_place,y=opponent_quality,color=draw_type))+
geom_point(position = position_jitter(w = 0.1, h = 0),alpha=.1)+
geom_smooth(method='lm')+
theme(legend.position="bottom")+
labs(y='sum of group stage opponents quality scores',
x='conference finish place',
title=paste0(py$nsim,' simulations results - total group opponent quality\ncubed quality matchup weighting'))
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
## `geom_smooth()` using formula 'y ~ x'
py$sim_standings_cu%>%
ggplot(aes(x=quality,y=league_place,color=label))+
geom_point(position = position_jitter(w = 0, h = .1),alpha=.1)+
labs(x="team quality",y="table place",color="conference",
title=paste0("Team Quality vs League Table Place: ",py$nsim,' simulations\ncubed quality matchup weighting'))+
theme(legend.position='bottom')+
scale_color_brewer(palette='Dark2')
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
group_rank<-py$sim_groups_cu%>%
arrange(sim,label,group,desc(quality))%>%
group_by(sim,label,group)%>%
mutate(group_qual_rank=row_number())%>%
ungroup()%>%
select(label,sim,'top_opponent'=team,group,'top_quality'=quality,group_qual_rank)
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top<-py$sim_groups_cu%>%
# join group rankings
left_join(group_rank,by=c('sim', 'label', 'group'))%>%
# delete rows where team is top opponent
filter(team!=top_opponent)%>%
# only keep lowest group rank for each team
group_by(team,sim,label,group)%>%
slice(which.min(group_qual_rank))%>%
ungroup()
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top%>%
mutate(draw_type = case_when(grepl('separate',label)~'separate',
TRUE~label))%>%
filter(league_place<=4)%>%
ggplot(aes(x=league_place,y=top_quality,color=draw_type))+
geom_point(position = position_jitter(w = 0.2, h = 0),alpha=.2)+
geom_smooth(method='lm')+
theme(legend.position="bottom")+
labs(y='top opponent quality score',
x='conference finish place',
title=paste0(py$nsim,' simulations results - top group opponent quality for pots 1 and 2\ncubed quality matchup weighting'))+
scale_color_viridis_d(option = 'inferno')
## `geom_smooth()` using formula 'y ~ x'
from math import e
def round_robin_exp(teams,team_qualities):
result_list=list()
# create a dictionary matching the random draw number to points for each team
points_dict={0:(3,0),1:(1,1),2:(0,3)}
for team1 in teams:
for team2 in teams:
if team1!=team2:
team1_qual,team2_qual=max(e**team_qualities[team1],10),max(e**team_qualities[team2],10)
weights = [team1_qual,stats.mean([team1_qual,team2_qual])*np.random.uniform(0,1),team2_qual]
probs = [w/sum(weights) for w in weights]
# draw a number, 0= team1 wins, 1=draw, 2=team2 win
result=points_dict[np.random.choice([0,1,2],1,False,probs)[0]]
game_frame=pd.DataFrame({'home':[team1],'away':[team2],'home_points':result[0],'away_points':result[1]})
result_list.append(game_frame)
return(pd.concat(result_list))
nsim=1000
standings_list_exp = list()
groups_list_exp = list()
# create 18 teams in each conference
east_teams = ['east_'+x for x in string.ascii_uppercase[0:18]]
west_teams = ['west_'+x for x in string.ascii_uppercase[0:18]]
for sim in range(nsim):
#print(sim)
# assign qualities to teams
east_qualities = assign_qualities(east_teams)
west_qualities = assign_qualities(west_teams)
# simulate round-robin league play
east_results = round_robin_exp(east_teams,east_qualities)
west_results = round_robin_exp(west_teams,west_qualities)
# find standings
east_standings = standings(east_teams,east_results,east_qualities,'east')
west_standings = standings(west_teams,west_results,west_qualities,'west')
# simulate draws
#west_standings = standings(west_teams,west_results,west_qualities)
east_west_draw = draw_groups([east_standings,west_standings],4,'combined')
east_draw = draw_groups(east_standings,2,'separate-east')
west_draw = draw_groups(west_standings,2,'separate-west')
standings_temp = pd.concat([east_standings,west_standings])
standings_temp['sim']=str(sim) #note the simulation number
standings_list_exp.append(standings_temp)
groups_temp = pd.concat([east_west_draw,east_draw,west_draw])
groups_temp['sim'] = str(sim)
groups_list_exp.append(groups_temp)
sim_standings_exp= pd.concat(standings_list_exp)
sim_groups_exp = pd.concat(groups_list_exp)
py$sim_groups_exp%>%
mutate(draw_type = case_when(grepl('separate',label)~'separate',
TRUE~label))%>%
ggplot(aes(x=league_place,y=opponent_quality,color=draw_type))+
geom_point(position = position_jitter(w = 0.1, h = 0),alpha=.1)+
geom_smooth(method='lm')+
theme(legend.position="bottom")+
labs(y='sum of group stage opponents quality scores',
x='conference finish place',
title=paste0(py$nsim,' simulations results - total group opponent quality\nexponential matchup weighting'))
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
## `geom_smooth()` using formula 'y ~ x'
py$sim_standings_exp%>%
ggplot(aes(x=quality,y=league_place,color=label))+
geom_point(position = position_jitter(w = 0, h = .1),alpha=.1)+
labs(x="team quality",y="table place",color="conference",
title=paste0("Team Quality vs League Table Place: ",py$nsim,' simulations\nexponential matchup weighting'))+
theme(legend.position='bottom')+
scale_color_brewer(palette='Dark2')
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
group_rank<-py$sim_groups_exp%>%
arrange(sim,label,group,desc(quality))%>%
group_by(sim,label,group)%>%
mutate(group_qual_rank=row_number())%>%
ungroup()%>%
select(label,sim,'top_opponent'=team,group,'top_quality'=quality,group_qual_rank)
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top<-py$sim_groups_exp%>%
# join group rankings
left_join(group_rank,by=c('sim', 'label', 'group'))%>%
# delete rows where team is top opponent
filter(team!=top_opponent)%>%
# only keep lowest group rank for each team
group_by(team,sim,label,group)%>%
slice(which.min(group_qual_rank))%>%
ungroup()
## Warning in py_to_r.pandas.core.frame.DataFrame(x): index contains duplicated
## values: row names not set
with_top%>%
mutate(draw_type = case_when(grepl('separate',label)~'separate',
TRUE~label))%>%
filter(league_place<=4)%>%
ggplot(aes(x=league_place,y=top_quality,color=draw_type))+
geom_point(position = position_jitter(w = 0.2, h = 0),alpha=.2)+
geom_smooth(method='lm')+
theme(legend.position="bottom")+
labs(y='top opponent quality score',
x='conference finish place',
title=paste0(py$nsim,' simulations results - top group opponent quality for pots 1 and 2\nexponential matchup weighting'))+
scale_color_viridis_d(option = 'inferno')
## `geom_smooth()` using formula 'y ~ x'